rm(list=ls())
library(tidyverse)
setwd("~/Downloads/dataverse_files(3)")

getmode <- function(v, na.omit = TRUE){
  if (na.omit == TRUE) {
    v <- na.omit(v)  
  }
  uv <- unique(v)
  tab <- tabulate(match(v, uv))
  out <- uv[tab == max(tab)]
  if(length(out) > 1){
    out <- sample(out,1)
  }
  return(out)
}

# add custom functions to extract estimates (tidy) and goodness-of-fit (glance) information
tidy.feglm <- function(x, ...) {
  s <- summary(x,  type = "sandwich")
  ret <- data.frame(
    term      = row.names(s$cm),
    estimate  = s$cm[, 1],
    std.error  = s$cm[, 2]
  )
  ret
}

glance.feglm <- function(x, ...) {
  ret <- data.frame(
    Null.Dev = x$null.deviance,
    Deviance = x$deviance,
    N   = x$nobs[4],
    `Committee FE` = as.integer(length(x$nms.fe[[1]])),
    `Congress FE` = as.integer(length(x$nms.fe[[2]])))
  ret
}


hearings<- read_csv("Data/CongressDocumentsCitations.csv")
num_ref <- hearings$dois_cited %>% unique() %>% length()

num_linked_refs <-  hearings$id %>% unique() %>% length()
num_linked_refs
num_ref
#42792
###### document level dataset
hearings$party_control <- fct_relevel(hearings$party_control, "R", "D")

#For SI 1
hearings %>% group_by(Chamber, congress) %>% summarise(num = length(unique(policy_document_id))) %>%
             ggplot(aes(x=congress, y=num, group = Chamber, linetype=Chamber)) + geom_line() + theme_minimal() + 
  xlab("Congress") + ylab("Number of Documents") -> congdoc

ggsave("Output/FigS1B.pdf", device = "pdf", width = 6, height = 6)

hearings %>% dplyr::select(policy_document_id, Committee_short) %>% unique() %>%nrow()

hearings %>% dplyr::select(policy_document_id, Committee_short) %>% unique() %>% group_by(policy_document_id) %>%
  summarise(num_committees = length(unique(Committee_short))) %>% filter(num_committees > 1) %>% nrow()

hearings$policy_document_id %>% unique() %>% length()

#49152

hearings <- hearings %>% mutate(cong_doc_type = case_when(grepl("event", hearings$policy_document_url) == T ~ "Hearing",
                                                          grepl("report", hearings$policy_document_url) == T ~ "Report",
                                                          grepl("print", hearings$policy_document_url) == T ~ "Publication", 
                                                          TRUE ~ NA))


documents <- read_csv("Data/CongressDocumentsAnalysisData.csv")
###### Figure 2A

documents$party_control <- fct_relevel(documents$party_control, "R", "D")






##### Figure 1B
library(sjPlot)
library(sjmisc)
library(sjstats)
library(ggplot2)
library(modelsummary)

documents2 <- documents %>% mutate(congress = as.numeric(as.factor(congress)))
documents2 <- documents2 %>% mutate(cong_comm = paste0(congress, committee_short))
documents2$party_control <- fct_relevel(documents2$party_control, "R", "D")

doc_cites_model_time <- glm(cites_science ~ party_control*congress + committee_short , data = documents2, family = binomial(link = "logit"))

library(lme4)

doc_cites_model_time_mixed <- glmer(cites_science ~ party_control*congress +
             (1 | cong_comm), data = documents2, family = binomial, control = glmerControl(optimizer = "bobyqa"),
           nAGQ = 1)

doc_cites_model_time112 <- glm(cites_science ~ party_control*congress + committee_short , data = filter(documents2, congress > 9), family = binomial(link = "logit"))


time_pred_plot_mixed <- plot_model(doc_cites_model_time_mixed, type = "pred", terms = c("congress", "party_control")) + 
  ylab("Probability that a committee document cites science") + xlab("Congress") + 
  scale_x_continuous("Congress", breaks = c(1,3,5,7,9,11,13), labels = c("104", "106", "108", "110", "112", "114", "116")) +
  ggtitle("") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + 
  scale_fill_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme_minimal(base_size = 7) + theme(legend.position = "bottom") 

ggsave("Output/Fig1B.pdf", time_pred_plot_mixed, device = "pdf", width = 4, height = 4, units="in")


modelsummary(list("Mixed Effects" = doc_cites_model_time_mixed, "All Congresses" = doc_cites_model_time, "113th-116th Congresses" =doc_cites_model_time112), coef_omit = "committee_short", output = "Output/TableS8.docx")



hearings_doc_cites_model_time <- glm(cites_science ~ party_control*congress + committee_short, data = filter(documents2, cong_doc_type == "Hearing"), family = binomial(link = "logit"))
report_doc_cites_model_time <- glm(cites_science ~ party_control*congress + committee_short, data = filter(documents2, cong_doc_type == "Report"), family = binomial(link = "logit"))
house_doc_cites_model_time <- glm(cites_science ~ party_control*congress + committee_short, data = filter(documents2, chamber == "House"), family = binomial(link = "logit"))
senate_doc_cites_model_time <- glm(cites_science ~ party_control*congress + committee_short, data = filter(documents2,  chamber == "Senate"), family = binomial(link = "logit"))




hearings_doc_cites_model_plot <- plot_model(hearings_doc_cites_model_time, type = "pred", terms = c("congress", "party_control")) + 
  ylab("Probability that a committee document cites science") + xlab("Congress") + 
  scale_x_continuous("Congress", breaks = c(1,3,5,7,9,11,13), labels = c("104", "106", "108", "110", "112", "114", "116")) +
  ggtitle("") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + 
  scale_fill_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme_minimal(base_size = 7) + theme(legend.position = "bottom") +
  ggtitle("Hearings")

reports_doc_cites_model_plot <- plot_model(report_doc_cites_model_time, type = "pred", terms = c("congress", "party_control")) + 
  ylab("Probability that a committee document cites science") + xlab("Congress") + 
  scale_x_continuous("Congress", breaks = c(1,3,5,7,9,11,13), labels = c("104", "106", "108", "110", "112", "114", "116")) +
  ggtitle("") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + 
  scale_fill_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme_minimal(base_size = 7) + theme(legend.position = "bottom") +
  ggtitle("Reports")

house_doc_cites_model_plot <- plot_model(house_doc_cites_model_time, type = "pred", terms = c("congress", "party_control")) + 
  ylab("Probability that a committee document cites science") + xlab("Congress") + 
  scale_x_continuous("Congress", breaks = c(1,3,5,7,9,11,13), labels = c("104", "106", "108", "110", "112", "114", "116")) +
  ggtitle("") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + 
  scale_fill_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme_minimal(base_size = 7) + theme(legend.position = "bottom") +
  ggtitle("House")

senate_doc_cites_model_plot <- plot_model(senate_doc_cites_model_time, type = "pred", terms = c("congress", "party_control")) + 
  ylab("Probability that a committee document cites science") + xlab("Congress") + 
  scale_x_continuous("Congress", breaks = c(1,3,5,7,9,11,13), labels = c("104", "106", "108", "110", "112", "114", "116")) +
  ggtitle("") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + 
  scale_fill_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme_minimal(base_size = 7) + theme(legend.position = "bottom") +
  ggtitle("Senate")


library(cowplot)

congress_heterogeneity_plot = cowplot::plot_grid(hearings_doc_cites_model_plot, reports_doc_cites_model_plot, house_doc_cites_model_plot,
                                                 senate_doc_cites_model_plot, labels = "auto")

ggsave("Output/FigS5.pdf", congress_heterogeneity_plot, device = "pdf", width = 8, height = 8, units="in")

modelsummary(list("Hearings"= hearings_doc_cites_model_time, "Reports"= report_doc_cites_model_time,
                  "House"= house_doc_cites_model_time, "Senate"=senate_doc_cites_model_time), coef_omit = "committee_short", output = "Output/CommitteeHeterogeneityPartyTime.docx")



theme_set(theme_gray())

#### Overall Difference in Citation to Science 
documents$party_control <- fct_relevel(documents$party_control, "R", "D")
doc_cites_model <- fixest::feglm(cites_science ~ party_control |  committee_short + congress , data = documents, family = binomial(link = "logit"))

doc_cites_model_post112 <- fixest::feglm(cites_science ~ party_control |  committee_short + congress , data = filter(documents, congress > 112), family = binomial(link = "logit"))


modelsummary(list("All Congresses" = doc_cites_model, "Post 112th Congress" = doc_cites_model_post112), coef_omit = "committee_short", output = "Output/TableS9.docx")





report_doc_cites_model <- fixest::feglm(cites_science ~ party_control |  committee_short + congress, data = filter(documents, cong_doc_type == "Report"), family = binomial(link = "logit"))
hearing_doc_cites_model <- fixest::feglm(cites_science ~ party_control |  committee_short + congress, data = filter(documents,  cong_doc_type == "Hearing"), family = binomial(link = "logit"))
house_doc_cites_model <- fixest::feglm(cites_science ~ party_control |  committee_short, data = filter(documents, chamber == "House"), family = binomial(link = "logit"))
senate_doc_cites_model <- fixest::feglm(cites_science ~ party_control |  committee_short, data = filter(documents, chamber == "Senate"), family = binomial(link = "logit"))

tmp1 <- report_doc_cites_model %>% tidy() %>%  mutate(term = "Democratic Control", model = "Reports") 
tmp2 <- hearing_doc_cites_model %>% tidy() %>%  mutate(term = "Democratic Control", model = "Hearings") 
tmp3 <- house_doc_cites_model %>% tidy() %>%  mutate(term = "Democratic Control", model = "House") 
tmp4 <- senate_doc_cites_model %>% tidy() %>%  mutate(term = "Democratic Control", model = "Senate" ) 

congress_main_hetero <- bind_rows(tmp1, tmp2, tmp3, tmp4)

hetero_doc_cites_model_plot <- congress_main_hetero %>%
  ggplot(aes(x = model, y=exp(estimate), ymax = exp(estimate + 1.96*std.error), ymin = exp(estimate - 1.96*std.error))) +
  geom_point(size=3, color = "#156B90") + geom_errorbar(width=0, size=2, color = "#156B90") +geom_hline(yintercept = 1, linetype = 3) + 
  theme_minimal(base_size = 12) + ylab("Odds Ratio") + xlab("Democratic Control") +
  ggtitle("")  + theme(legend.position = "bottom") +xlab("") 

modelsummary(list("Reports" = report_doc_cites_model, "Hearings" = hearing_doc_cites_model, 
                  "House" = house_doc_cites_model, "Senate" = senate_doc_cites_model), coef_omit = "committee_short", output = "Output/TableS10.docx")

ggsave("Output/FigS4.pdf", hetero_doc_cites_model_plot, device = "pdf", width = 4, height = 4, units="in")

documents <-documents %>% mutate(pres_party = case_when(congress %in% c(104:106, 111:114,117,118) ~ "D", 
                                                        congress %in% c(107:110, 115,116) ~ "R"))

documents <- documents %>% mutate(coparty_pres = case_when(party_control == pres_party ~ 1,
                                                           party_control != pres_party ~ 0))

doc_cites_model2 <- fixest::feglm(cites_science ~ party_control + coparty_pres + chamber + cong_doc_type | committee_short + congress , data = documents, family = binomial(link = "logit"))
modelsummary(doc_cites_model2, coef_omit = "committee_short", output = "Output/TableS19.docx")


#### By FOR


load("Data/Committee_documents_FOR.RData")

cited_papers_FOR <- read_csv("Data/CongressPaperFOR.csv") %>% dplyr::select(-`...1`) %>% filter(for_field_id < 100) %>% unique()


hearings_FOR <- hearings %>% left_join(cited_papers_FOR, by = c("id" = "paper_id")) %>% unique()
hearings_FOR <- hearings_FOR %>% rename(FOR_code = for_field_id)


FORs <- hearings_FOR$FOR_code %>% unique()


documents_FOR <- lapply(documents_FOR, function(i)  mutate(i, party_control = fct_relevel(party_control, "R", "D"))) 

FOR_models <- lapply(documents_FOR, function(i)  fixest::feglm(cites_science ~ party_control | committee_short + congress, data = i, family = binomial(link = "logit"))) 

library(broom)
FOR_results <- do.call(rbind, lapply(FOR_models, function(i) tidy(i))) %>% as_tibble()
FOR_results$FOR_code <- FORs

for_labels <- read_csv("Data/Dimensions-Fields-of-Research-2020-10-28_14-28-57.csv") 
for_labels <- for_labels %>% dplyr::select(Name, `Fields of Research code`)
names(for_labels) <- c("FOR_text", "FOR_code")
for_labels$FOR_code <- as.numeric(for_labels$FOR_code)
FOR_results$FOR_code <- as.numeric(FOR_results$FOR_code)

FOR_results <- FOR_results %>% left_join(for_labels)


FOR_results$FOR_text <- as.character(FOR_results$FOR_text)
FOR_results$FOR_text[is.na(FOR_results$FOR_text)] <- "Unknown"

FOR_results <- FOR_results %>% 
mutate(FOR_text = str_replace_all(FOR_text, "and", "&"))

FOR_results$FOR_text <- fct_reorder(FOR_results$FOR_text, FOR_results$estimate, .fun =mean)

for_cc_plot <- ggplot(FOR_results, aes(x=FOR_text, group = FOR_text, y=estimate, ymin = estimate - 1.96*std.error, ymax = estimate + 1.96*std.error)) + 
  geom_point(color = "#156B90", size =3 ) +
  geom_errorbar(size=2, width=0,alpha=.65, colour = "#156B90") + 
  coord_flip() + theme_minimal(base_size = 7) + 
  geom_hline(yintercept = 0, linetype = 3) +
  ylab("Log odds") +xlab("")

ggsave("Output/FigS6A.pdf", for_cc_plot, device = "pdf", width = 8, height = 4, units="in")



FOR_models <- as.list(FOR_models)

names(FOR_models) <- FOR_results$FOR_text

modelsummary(FOR_models, output = "Output/TableS11-12.docx")

###### DID

committee_congress_citations <- hearings %>% group_by(congress,Committee_short) %>% 
  dplyr::summarise(sci_cites = length(unique(dois_cited[!is.na(dois_cited)])), 
                   party_control = getmode(party_control),
                   paper_mean_cites = mean(times_cited, na.rm = T),
                   paper_mean_altmetric = mean(altmetric, na.rm = T),
                   num_hearings = length(unique(policy_document_id[cong_doc_type == "Hearing"])),
                   num_reports = length(unique(policy_document_id[cong_doc_type == "Report"])),
                   num_publications = length(unique(policy_document_id[cong_doc_type == "Publication"])))

committee_congress_citations$treatment <- NA
committee_congress_citations$treatment[committee_congress_citations$party_control == "D"] <- 1
committee_congress_citations$treatment[committee_congress_citations$party_control == "R"] <- 0
committee_congress_citations <- committee_congress_citations %>% filter(!is.na(Committee_short))

library(DIDmultiplegt)

dev.off()
did_out <- did_multiplegt(committee_congress_citations, "sci_cites", "Committee_short", "congress", "treatment", placebo = 2, dynamic = 2,
               threshold_stable_treatment = 0, recat_treatment = NULL,
               trends_nonparam = NULL, trends_lin = NULL,
               brep = 100, cluster = "Committee_short", covariance = FALSE, average_effect = NULL,
               parallel = F, mode = "old")

did_out$effect

dev.copy(pdf, "Output/FigS9.pdf", width = 8, height = 6)
dev.off()


library(fect)


library(panelView)
library(patchwork)
committee_panel <- panelview(sci_cites ~ treatment, data = committee_congress_citations, index = c("Committee_short","congress"), 
          axis.lab = "time", xlab = "Congress", ylab = "Committees", 
          background = "white", main = "Partisan Control of Committees")

ggsave("Output/FigS10.pdf", committee_panel, width = 6, height =4)


#FEct
out.fect <- fect(sci_cites ~ treatment + num_reports +num_hearings + num_publications, data = committee_congress_citations, index = c("Committee_short","congress"), 
                 method = "fe", force = "two-way",  se = TRUE, parallel = TRUE, nboots = 200)

print(out.fect)

#calculate effect size for main text
out.fect$est.att["1","ATT"]/sd(committee_congress_citations$sci_cites)



fect_comm_plot <- plot(out.fect, main = "Estimated ATT (FEct)", ylab = "Effect of party control (D) on citations to science", 
     cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8)

ggsave("Output/FigS12.pdf", fect_comm_plot, width = 6, height =4)

pre_trend_FE <- plot(out.fect, type = "equiv", 
     cex.legend = 0.6, main = "Testing Pre-Trend (FEct)", cex.text = 0.8)

ggsave("Output/FigS11.pdf", pre_trend_FE, width = 6, height =4)


out.fect.p <- fect(sci_cites ~ treatment + num_reports +num_hearings + num_publications, data = committee_congress_citations, index = c("Committee_short","congress"), 
                   force = "two-way", parallel = TRUE, se = TRUE, CV = 0,
                   nboots = 200, placeboTest = TRUE, placebo.period = c(-2, 0))

placebo_FE <- plot(out.fect.p, cex.text = 0.8, stats = c("placebo.p","equiv.p"), main = "Estimated ATT (FE)")
ggsave("Output/CommitteeUnitFECT_DID_placebo.pdf", placebo_FE, width = 6, height =4)




######## trend by hit paper

munge_doc_hit <- function(dataset, hit_val) {
  ret <- dataset %>% group_by(policy_document_id) %>% summarise(party_control = getmode(party_control),
                                                                committee_short = getmode(Committee_short),
                                                                congress = getmode(congress), 
                                                                num_citations = length(dois_cited[!is.na(dois_cited) & hit_5 == hit_val & !is.na(hit_5)]),
                                                                cites_science = case_when( num_citations > 0 ~ 1,
                                                                                           TRUE ~ 0),
                                                                hit_5 = hit_val)
  return(ret)
}


HITs <- hearings$hit_5 %>% unique()


documents_HIT <- lapply(HITs, function(i) munge_doc_hit(hearings, hit_val = i))

library(sjPlot)
library(sjmisc)
library(ggplot2)
library(modelsummary)
theme_set(theme_sjplot())

documents_HIT[[2]] <- documents_HIT[[2]] %>% mutate(congress = as.numeric(as.factor(congress)), cong_comm = paste(congress, committee_short, sep = "_"))


doc_nohit_model_time_mixed <- glmer(cites_science ~ party_control*congress +
                                      (1 | cong_comm), data = documents_HIT[[2]], family = binomial, control = glmerControl(optimizer = "bobyqa"),
                                    nAGQ = 1)

nohit_time_pred_plot <- plot_model(doc_nohit_model_time_mixed, type = "pred", terms = c("congress [all]", "party_control")) + ylab("Probability that a committee document cites non-hit science")  +
  ggtitle("Cites to non-hit scientific papers") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme(legend.position = "bottom")  +theme_minimal() +
  theme(legend.position = "bottom") + scale_x_continuous("Congress", breaks = c(2,7,12), labels = c("104", "109", "114"))



documents_HIT[[3]] <- documents_HIT[[3]] %>% mutate(congress = as.numeric(as.factor(congress)),  cong_comm = paste(congress, committee_short, sep = "_"))

doc_hit_model_time_mixed <- glmer(cites_science ~ party_control*congress +
                                      (1 | cong_comm), data = documents_HIT[[3]], family = binomial, control = glmerControl(optimizer = "bobyqa"),
                                    nAGQ = 1)
hit_time_pred_plot <- plot_model(doc_hit_model_time_mixed, type = "pred", terms = c("congress [all]", "party_control")) + ylab("Probability that a committee document cites hit science") + xlab("Year") +
  ggtitle("Cites to hit (top 5%) scientific papers") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90"))  +theme_minimal() +
  theme(legend.position = "bottom") + scale_x_continuous("Congress", breaks = c(2,7,12), labels = c("104", "109", "114"))

library(cowplot)


###post 112


doc_nohitcites_model_time_post112 <- glm(cites_science ~ party_control*congress + committee_short , data = filter(documents_HIT[[2]], congress > 9), family = binomial(link = "logit"))


nohit_time_pred_plot_post112 <- plot_model(doc_nohitcites_model_time_post112, type = "pred", terms = c("congress [all]", "party_control")) + ylab("Probability that a committee document cites non-hit science")  +
  ggtitle("Cites to non-hit scientific papers") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90")) + theme(legend.position = "bottom")  +theme_minimal() +
  theme(legend.position = "bottom") + scale_x_continuous("Congress", breaks = c(10,12), labels = c("113", "115"))


documents_HIT[[3]] <- documents_HIT[[3]] %>% mutate(congress = as.numeric(as.factor(congress)))

doc_hitcites_model_time_post112 <- glm(cites_science ~ party_control*congress + committee_short , data = filter(documents_HIT[[3]], congress >9), family = binomial(link = "logit"))

hit_time_pred_plot_post112 <- plot_model(doc_hitcites_model_time_post112, type = "pred", terms = c("congress [all]", "party_control")) + ylab("Probability that a committee document cites hit science") + xlab("Year") +
  ggtitle("Cites to hit (top 5%) scientific papers") + scale_color_manual("", labels  = c("Rep", "Dem"), values=c("#9A3E25","#156B90"))  +theme_minimal() +
  theme(legend.position = "bottom") + scale_x_continuous("Congress", breaks = c(10,12), labels = c("113", "115"))



hit_time_pred_plot_all  <- cowplot::plot_grid(hit_time_pred_plot, nohit_time_pred_plot, hit_time_pred_plot_post112 , nohit_time_pred_plot_post112 , nrow = 2, labels = "auto")

ggsave("Output/FigS7.pdf", hit_time_pred_plot_all , device = "pdf", width = 8, height = 8, units="in")

modelsummary(list("Hit 5 Multilevel" = doc_hit_model_time_mixed, "Non-Hit Multilevel" = doc_nohit_model_time_mixed, "Hit 5 Post 112th" = doc_hitcites_model_time_post112 , "Non-Hit Post 112th" = doc_nohitcites_model_time_post112 ), output = "HitCommitteePartyTime_post112.docx")








######### By Issue area

classifications <- read_csv("Data/CongressDocumentClassifications.csv")

documents <- read_csv("Data/CongressDocumentsAnalysisData.csv")

documents <- documents %>% left_join(classifications)

doc_issues <- documents %>% split(.$class1)


doc_issues <- lapply(doc_issues, function(i)  mutate(i, party_control = fct_relevel(party_control, "R", "D"))) 

issue_models <- lapply(doc_issues, function(i)  fixest::feglm(cites_science ~ party_control | committee_short + congress, data = i, family = binomial(link = "logit"))) 

library(broom)
issue_results <- do.call(rbind, lapply(issue_models, function(i) tidy(i))) %>% as_tibble()

issue_results$issue <- names(doc_issues)

issue_results <- issue_results %>% 
  mutate(issue = str_replace_all(issue, "and", "&"))

issue_results$issue <- fct_reorder(issue_results$issue, issue_results$estimate, .fun =mean)

issue_cc_plot <- ggplot(issue_results, aes(x=issue, group = issue, y=estimate, ymin = estimate - 1.96*std.error, ymax = estimate + 1.96*std.error)) + 
  geom_point(color = "#156B90", size =3 ) +
  geom_errorbar(size=2, width=0,alpha=.65, colour = "#156B90") + 
  coord_flip() + theme_minimal(base_size = 7) + 
  geom_hline(yintercept = 0, linetype = 3) +
  ylab("Log odds") +xlab("")

ggsave("Output/FigS6B.pdf", issue_cc_plot, device = "pdf", width = 6, height = 4, units="in")




issue_models <- as.list(issue_models)

names(issue_models) <- issue_results$issue

modelsummary(issue_models, output = "Output/TableS13.docx")


###### By FOR * Issue 





hearings <- hearings %>% left_join(classifications)

hearings <- hearings %>% mutate(dem = case_when(party_control == "D" ~ 1,
                                                party_control == "R" ~ 0),
                                rep = case_when(party_control == "D" ~ 0,
                                                party_control == "R" ~ 1))




cited_papers_FOR <- read_csv("Data/CongressPaperFOR.csv") %>% dplyr::select(-`...1`) %>% filter(for_field_id < 100) %>% unique()


hearings_FOR <- hearings %>% left_join(cited_papers_FOR, by = c("id" = "paper_id")) %>% unique()
hearings_FOR <- hearings_FOR %>% rename(FOR_code = for_field_id)
for_labels <- read_csv("Data/Dimensions-Fields-of-Research-2020-10-28_14-28-57.csv") 
for_labels <- for_labels %>% dplyr::select(Name, `Fields of Research code`)
names(for_labels) <- c("FOR_text", "FOR_code")
for_labels$FOR_code <- as.numeric(for_labels$FOR_code)
hearings_FOR$FOR_code <- as.numeric(hearings_FOR$FOR_code)

hearings_FOR <- hearings_FOR %>% left_join(for_labels)
hearings_FOR
table(hearings_FOR$class1, hearings_FOR$FOR_text)

demrep <- hearings %>% group_by(dois_cited) %>% summarise(dem = sum(dem, na.rm =T), rep = sum(rep,na.rm =T)) %>% mutate(cite_type = case_when(dem > 0 & rep > 0 ~ "Both",
                                                                                                                                              rep > 0 ~ "R", 
                                                                                                                                              dem > 0 ~ "D")) %>% dplyr::select(-dem, -rep)

hearings_FOR <- hearings_FOR %>% left_join(demrep)



overlap<- hearings_FOR %>% filter(!is.na(FOR_code))  %>% group_by(FOR_text, class1 ) %>% 
  summarise(both = length(unique(dois_cited[cite_type == "Both"])),
                                                                     D = length(unique(dois_cited[cite_type == "D"])),
                                                                     R = length(unique(dois_cited[cite_type == "R"])),
                                                                     All = length(unique(dois_cited)),
                                                                     both_share = both/All, 
                                                                     D_share = D/All, 
                                                                     R_share = R/All)




overlap_FOR <- hearings_FOR %>% filter(!is.na(FOR_code)) %>% dplyr::select(dois_cited, cite_type, FOR_text) %>% group_by(FOR_text) %>% 
  summarise(both = length(unique(dois_cited[cite_type == "Both"])),
            D = length(unique(dois_cited[cite_type == "D"])),
            R = length(unique(dois_cited[cite_type == "R"])),
            All = length(unique(dois_cited)),
            both_share = both/All, 
            D_share = D/All, 
            R_share = R/All)

overlap_class <- hearings_FOR %>% filter(!is.na(FOR_code)) %>% dplyr::select(dois_cited, cite_type, class1) %>% group_by(class1) %>% 
  summarise(both = length(unique(dois_cited[cite_type == "Both"])),
            D = length(unique(dois_cited[cite_type == "D"])),
            R = length(unique(dois_cited[cite_type == "R"])),
            All = length(unique(dois_cited)),
            both_share = both/All, 
            D_share = D/All, 
            R_share = R/All)


overlap_class <- overlap_class %>% mutate(class1 = fct_reorder(class1, both_share)) %>% filter(!is.na(class1))
class_both <- overlap_class %>% ggplot(aes(x=both_share, y = class1)) + geom_point() + theme_minimal() + scale_x_continuous(label = scales::percent) +xlab("Share of science cited by both R and D") + ylab("")


overlap_FOR <- overlap_FOR %>% mutate(FOR_text = fct_reorder(FOR_text, both_share)) %>% filter(!is.na(FOR_text))
FOR_both <- overlap_FOR %>% ggplot(aes(x=both_share, y = FOR_text)) + geom_point() + theme_minimal() + scale_x_continuous(label = scales::percent) +xlab("Share of science cited by both R and D") + ylab("")

library(cowplot)
for_class_overlap <- cowplot::plot_grid(FOR_both, class_both, nrow = 1)


ggsave("Output/FigS17.pdf", for_class_overlap, width = 10, height = 4)

######### time series overlap
hearings<- read_csv("Data/CongressDocumentsCitations.csv")

hearings

hearings$party_control <- fct_relevel(hearings$party_control, "R", "D")

hearings <- hearings %>% mutate(dem = case_when(party_control == "D" ~ 1,
                                                party_control == "R" ~ 0),
                                rep = case_when(party_control == "D" ~ 0,
                                                party_control == "R" ~ 1))


demrep <- hearings %>% group_by(dois_cited) %>% summarise(dem = sum(dem, na.rm =T), rep = sum(rep,na.rm =T)) %>% mutate(cite_type = case_when(dem > 0 & rep > 0 ~ "Both",
                                                                                                                                              rep > 0 ~ "R", 
                                                                                                                                              dem > 0 ~ "D")) %>% dplyr::select(-dem, -rep)

hearings <- hearings %>% left_join(demrep)

overlap <- hearings %>% summarise(both = length(unique(dois_cited[cite_type == "Both"])),
                                                                                                  D = length(unique(dois_cited[cite_type == "D"])),
                                                                                                  R = length(unique(dois_cited[cite_type == "R"])),
                                                                                                  All = length(unique(dois_cited)),
                                                                                                  both_share = both/All, 
                                                                                                  D_share = D/All, 
                                                                                                  R_share = R/All)
                                       


overlap <- overlap %>% dplyr::select(both_share, D_share, R_share) %>% pivot_longer(cols = 1:3, names_to = "type", values_to = "val")

overlap_time  <- hearings %>% group_by(congress) %>% summarise(both = length(unique(dois_cited[cite_type == "Both"])),
                                                  D = length(unique(dois_cited[cite_type == "D"])),
                                                  R = length(unique(dois_cited[cite_type == "R"])),
                                                  All = length(unique(dois_cited)),
                                                  both_share = both/All, 
                                                  D_share = D/All, 
                                                  R_share = R/All)


overlap_p <- overlap %>% ggplot(aes(x=type, y=val, fill = type)) + geom_bar(stat="identity") + theme_classic() + 
  scale_y_continuous(labels = scales::percent) + 
  scale_x_discrete(labels = c("Both", "Dem only", "Rep only")) + 
  scale_fill_manual("", values=c("#666666", "#156B90","#9A3E25")) + 
  ylab("Percent of cited science") + xlab("") + theme(legend.position = "none")


overlap_time_p <- overlap_time %>% ggplot(aes(x=congress, y=both_share)) + geom_line() + theme_minimal() +
  scale_y_continuous(labels = scales::percent, limits = c(0,1)) + ylab("Percent of cited science") + xlab("Congress") 



p_grid <- cowplot::plot_grid(overlap_p, overlap_time_p)

ggsave("Output/FigS13.pdf", p_grid, width = 11, height = 5)

